library(tidyverse)
library(ggplot2)
library(plotly)
library(DT)
SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")
SNPs$chromosome = ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
d <- ggplot(data = SNPs)+
geom_bar(mapping = aes(x = chromosome))
d + ggtitle("Number of SNPs per chromosome")
d
colors <- c("AA"="red","AC"="red","AT"="red","AG"="red","CC"="red","CG"="red","CT"="red","GG"="red","GT"="red","TT"="red","A"="yellow","T"="yellow","G"="yellow","C"="yellow","DD"="green","II"="green","D"="green","DI"="green","I"="green","--"="black")
dinuc <- c("AA","AC","AT","AG","CC","CG","CT","GG","GT","TT")
mono <- c("A","T","G","C")
others<- c("DD","II","DI","D","I","--")
d <- ggplot(data = SNPs)+
geom_bar(mapping = aes(x = chromosome, fill = genotype))+
ggtitle("Number of SNPs per Chromosome")+
ylab("SNP count")+
xlab("Chromosome Number")+
scale_fill_manual(values = c(colors),
name = "Nucleotides",
breaks = c(dinuc,mono,others))
d
Nucleotide counts per chromosome
d <- ggplot(data = SNPs)+
geom_bar(position = position_dodge(),mapping = aes(x = chromosome, fill = genotype ))+
facet_wrap(~genotype, ncol=2) +
ggtitle("Number of Nucleotides per Chromosome")+
xlab("Chromosome Number")+
ylab("Number of Nucleotides")+
theme(axis.text.x = element_text(angle = 0, size = 4))
d
d <- ggplot(data = SNPs)+
geom_bar(position = position_dodge(),mapping = aes(x = chromosome, fill = genotype ))+
facet_wrap(~genotype, ncol=2)+
ggtitle("Number of Nucleotides per Chromosome")+
xlab("Chromosome Number")+
ylab("Number of Nucleotides")+
theme(axis.text.x = element_text(angle = 0, size = 4)
)
ggplotly(d)
SNP_table_Y <- subset(SNPs, chromosome == 'Y')
datatable(SNP_table_Y)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html